clear all;
close all;

%  =======================================================================================================================================================
% 
%  Code Description: 
%  This codefile constructs the dataset underlying the Falato et al. panel regressions presented in Section 3.3.2 (Direct Evidence on Cross-Fund Contagion)
%  and Table 5.
% 
%  =======================================================================================================================================================
% 
%  Major output:
%  - dataset underlying the Falato et al. panel regressions
%  =======================================================================================================================================================
% 
%  General disclaimer:
%  This file directory produces replication code for "Connected Funds". 
%  Because we cannot share the underlying data provided by the Bundesbank's Research Data and Service Centre (RDSC) and other subscription data sources, 
%  we have included pseudo data to show how the raw data are formatted.  
%  Other researchers can go through a similar approval and subscription process to obtain the underlying data. (2023-04-06)
% 
%  =======================================================================================================================================================


%% Set project directory

projectPath = 'C:\ConnectedFunds_Codebase\'

% Add functions folders
addpath(strcat(projectPath, 'Code\matlab_functions'));


cd 'C:\ConnectedFunds_Codebase\Code\'

%  ===================================== Compute peer flow pressure variables for equities, bonds and fund shares =================================================


% Load fund data
load(strcat(projectPath, 'Data\IFS\IFS_All_CleanData'));

% Clean fund data
IFS_All(IFS_All.artmittel == 4, :)       = [];
IFS_All(IFS_All.artmittel == 6, :)       = [];
IFS_All(IFS_All.artmittel > 7, :)        = [];
IFS_All(isnan(IFS_All.FONDSVERM),:)      = [];
IFS_All(IFS_All.FONDSVERM<1000,:)        = [];
IFS_All(isnan(IFS_All.ReturnDiv),:)      = [];
IFS_All(abs(IFS_All.ReturnDiv)>0.8,:)    = [];
IFS_All(isnan(IFS_All.Netflows),:)       = [];
IFS_All(abs(IFS_All.Netflows)>0.8,:)     = [];
IFS_All(isnan(IFS_All.Netflows_lag),:)   = [];
IFS_All(abs(IFS_All.Netflows_lag)>0.8,:) = [];
IFS_All(IFS_All.etf == 1, :)             = [];

Results_IFS_All = [];

% Loop over time and build dataset
for t = Tmonth - 1 : -1 : 2

    t,
    
    IFS_All_t = IFS_All(IFS_All.DATUM==umonth(t),:);
    
    if isempty(IFS_All_t) == 0

        IFS_All_t = sortrows(IFS_All_t,'ISIN','ascend');

        % Get funds' lagged holdings
        load(strcat(projectPath, 'Data\IFS\mat\IFS_Holdings_',num2str(umonth(t-1)),'.mat'));
        IFS_Holdings(IFS_Holdings.AMOUNT<=0,:) = [];
        IFS_Holdings_1 = IFS_Holdings;
        clear idx IFS_Holdings;

        % Get funds' contemporary holdings
        load(strcat(projectPath, 'Data\IFS\mat\IFS_Holdings_',num2str(umonth(t)),'.mat'));
        IFS_Holdings(IFS_Holdings.AMOUNT<=0,:) = [];
        IFS_Holdings_2 = IFS_Holdings;
        clear idx IFS_Holdings;

        ufund =  intersect(intersect(IFS_Holdings_1.ISIN,IFS_Holdings_2.ISIN),IFS_All_t.ISIN);

        % Keep only funds that are active in both t-1 and t and for which we have IFS data on fund characteristics.
        [~,idx1_a]                      = ismember(IFS_Holdings_1.ISIN,ufund);
        IFS_Holdings_1(idx1_a == 0, : ) = [];
        [~,idx2_a]                      = ismember(IFS_Holdings_2.ISIN,ufund);
        IFS_Holdings_2(idx2_a == 0, : ) = [];
        clear idx*

        % Compute cross-fund network
        [~,idx]                 = ismember(IFS_Holdings_1.SECCODE,ufund);
        IFS_Holdings_IF_1       = IFS_Holdings_1(idx>0,:);
        IFS_Holdings_1(idx>0,:) = [];
        [~,idx]                 = ismember(IFS_Holdings_2.SECCODE,ufund);
        IFS_Holdings_IF_2       = IFS_Holdings_2(idx>0,:);
        IFS_Holdings_2(idx>0,:) = [];

        uasset = unique([IFS_Holdings_1.SECCODE; IFS_Holdings_2.SECCODE]);

        [~,idx1_a] = ismember(IFS_Holdings_IF_1.ISIN,ufund);
        [~,idx1_b] = ismember(IFS_Holdings_IF_1.SECCODE,ufund);

        Network_Fund_1          = accumarray([idx1_a idx1_b],IFS_Holdings_IF_1.AMOUNT,[length(ufund) length(ufund)]);
        Network_Number_Fund_1   = accumarray([idx1_a idx1_b],IFS_Holdings_IF_1.SECNOMUN,[length(ufund) length(ufund)]);

        [~,idx2_a] = ismember(IFS_Holdings_IF_2.ISIN,ufund);
        [~,idx2_b] = ismember(IFS_Holdings_IF_2.SECCODE,ufund);

        Network_Fund_2          = accumarray([idx2_a idx2_b],IFS_Holdings_IF_2.AMOUNT,[length(ufund) length(ufund)]);
        Network_Number_Fund_2   = accumarray([idx2_a idx2_b],IFS_Holdings_IF_2.SECNOMUN,[length(ufund) length(ufund)]);

        clear idx*

        % Compute cross-asset network
        [~,idx1_a] = ismember(IFS_Holdings_1.ISIN,ufund);
        [~,idx1_b] = ismember(IFS_Holdings_1.SECCODE,uasset);

        Network_1           = accumarray([idx1_a idx1_b],IFS_Holdings_1.AMOUNT,[length(ufund) length(uasset)]);
        Network_Number_1    = accumarray([idx1_a idx1_b],IFS_Holdings_1.SECNOMUN,[length(ufund) length(uasset)]);

        [~,idx2_a] = ismember(IFS_Holdings_2.ISIN,ufund);
        [~,idx2_b] = ismember(IFS_Holdings_2.SECCODE,uasset);

        Network_2           = accumarray([idx2_a idx2_b],IFS_Holdings_2.AMOUNT,[length(ufund) length(uasset)]);
        Network_Number_2    = accumarray([idx2_a idx2_b],IFS_Holdings_2.SECNOMUN,[length(ufund) length(uasset)]);

        clear IFS_Holdings*

        Overlap = ComputeSimilarity_Cosine(Network_1./repmat(sum(Network_1')',1,size(Network_1,2)));

        % Load security information from CSDB
        load(strcat(projectPath, 'Data\CSDB\mat\CSDB_', num2str(umonth(t)), '.mat'))

        [~,idx] = ismember(uasset,CSDB.ISIN);
        AssetType_num = zeros(length(uasset),1);
        
        if umonth(t) > 201301
            AssetType_num(idx>0) =  1* startsWith(CSDB.ESA_INS_2010(idx(idx>0)),"F_3") + ... % bond
                2* startsWith(CSDB.ESA_INS_2010(idx(idx>0)),"F_51") + ... % equity
                3* startsWith(CSDB.ESA_INS_2010(idx>0),"F_52");    % Fund share
        else
            AssetType_num(idx>0) =  1* startsWith(CSDB.ESA_INS_1995(idx(idx>0)),"F.3") + ... % bond
                2* startsWith(CSDB.ESA_INS_1995(idx(idx>0)),"F.51") + ... % equity
                3* startsWith(CSDB.ESA_INS_1995(idx>0),"F.52");    % Fund share
        end

        [~,idx] = ismember(uasset,ufund);
        AssetType_num(idx>0) = 3;

        clear idx* CSDB IFS_Holdings*;

        % Focus on bond holdings, equity holdings, and German cross-fund holdings only
        i = find(AssetType_num < 1 | AssetType_num > 2);

        AssetType_num(i)        = [];
        Network_1(:,i)          = [];
        Network_Number_1(:,i)   = [];
        Network_2(:,i)          = [];
        Network_Number_2(:,i)   = [];
        uasset(i)               = [];
        clear i;

        TotalHoldings_1 = sum(Network_1')' + sum(Network_Fund_1')';
        i = find(TotalHoldings_1 == 0);
        
        Network_1(i,:)          = [];
        Network_Number_1(i,:)   = [];
        Network_Fund_1(i,:)     = []; 
        Network_Fund_1(:,i)     = [];
        Network_2(i,:)          = [];
        Network_Number_2(i,:)   = [];
        Network_Fund_2(i,:)     = []; 
        Network_Fund_2(:,i)     = [];

        [~,idx]             = ismember(IFS_All_t.ISIN,ufund(i));
        IFS_All_t(idx>0,:)  = [];
        ufund(i)            = [];
        Overlap(i,:)        = []; 
        Overlap(:,i)        = [];

        TotalHoldings_1 = sum(Network_1')' + sum(Network_Fund_1')';

        clear i idx*

        % Build weighted cross-asset network
        
        ... Bonds
        i = find(AssetType_num == 1);
        WNetwork_Bond = Network_1(:,i)./repmat(TotalHoldings_1,1,length(i));
        WNetwork_Bond(isnan(WNetwork_Bond)) = 0; WNetwork_Bond(isinf(WNetwork_Bond)) = 0;
        Network_Bond_1  = Network_Number_1(:,i);
        Network_Bond_2  = Network_Number_2(:,i);
        uasset_Bond = uasset(i);

        ... Equities
        i = find(AssetType_num == 2);
        WNetwork_Equity = Network_1(:,i)./repmat(TotalHoldings_1,1,length(i));
        WNetwork_Equity(isnan(WNetwork_Equity)) = 0; WNetwork_Equity(isinf(WNetwork_Equity)) = 0;
        Network_Equity_1  = Network_Number_1(:,i);
        Network_Equity_2  = Network_Number_2(:,i);
        uasset_Equity = uasset(i);

        ... Fund shares
        WNetwork_Fund = Network_Fund_1./repmat(TotalHoldings_1,1,length(ufund));
        WNetwork_Fund(isnan(WNetwork_Fund)) = 0; WNetwork_Fund(isinf(WNetwork_Fund)) = 0;

        ufund = table(cellstr(ufund));
        ufund.Properties.VariableNames{1} = 'ISIN';

        ifs_data_t = join(ufund,IFS_All_t);

        k_outflows = find(ifs_data_t.Netflows <= -0.05);
        k_inflows  = find(ifs_data_t.Netflows >= 0.05);

        clear IFS_All_t AssetType_num;

        % Compute bond pressure
        FlowInducedSales_Bond = sum(max(0, - (Network_Bond_2(k_outflows,:) - Network_Bond_1(k_outflows,:))));
        FlowInducedBuys_Bond  = sum(max(0,   (Network_Bond_2(k_inflows,:) - Network_Bond_1(k_inflows,:))));
        OfferingValue_Bond    = sum(Network_Bond_1);
        OfferingValue_Bond(OfferingValue_Bond == 0 ) = NaN;
        FlowPressure_Bond     = (FlowInducedBuys_Bond - FlowInducedSales_Bond) ./ OfferingValue_Bond;
        FlowPressure_Bond(isnan(FlowPressure_Bond)) = 0;
        FlowPressure_Buys_Bond     = (FlowInducedBuys_Bond) ./ OfferingValue_Bond;
        FlowPressure_Buys_Bond(isnan(FlowPressure_Buys_Bond)) = 0;
        FlowPressure_Sales_Bond     = (FlowInducedSales_Bond) ./ OfferingValue_Bond;
        FlowPressure_Sales_Bond(isnan(FlowPressure_Sales_Bond)) = 0;
        PeerFlowPressure_Bond  = WNetwork_Bond * FlowPressure_Bond';
        PeerFlowPressure_Buys_Bond   = WNetwork_Bond * FlowPressure_Buys_Bond';
        PeerFlowPressure_Sales_Bond  = WNetwork_Bond * FlowPressure_Sales_Bond';

        for k = 1:length(k_outflows)
            OwnPressure =  max(0, - (Network_Bond_2(k_outflows(k),:) - Network_Bond_1(k_outflows(k),:))) ./ OfferingValue_Bond;
            OwnPressure(isnan(OwnPressure)) = 0;
            PeerFlowPressure_Bond(k_outflows(k))       = PeerFlowPressure_Bond(k_outflows(k)) + (WNetwork_Bond(k_outflows(k),:) *OwnPressure');
            PeerFlowPressure_Sales_Bond(k_outflows(k)) = PeerFlowPressure_Sales_Bond(k_outflows(k)) - (WNetwork_Bond(k_outflows(k),:) *OwnPressure');
        end

        for k = 1:length(k_inflows)
            OwnPressure =  max(0, (Network_Bond_2(k_inflows(k),:) - Network_Bond_1(k_inflows(k),:))) ./ OfferingValue_Bond;
            OwnPressure(isnan(OwnPressure)) = 0;
            PeerFlowPressure_Bond(k_inflows(k)) = PeerFlowPressure_Bond(k_inflows(k)) - (WNetwork_Bond(k_inflows(k),:) *OwnPressure');
            PeerFlowPressure_Buys_Bond(k_inflows(k)) = PeerFlowPressure_Buys_Bond(k_inflows(k)) - (WNetwork_Bond(k_inflows(k),:) *OwnPressure');
        end

        ifs_data_t.PeerFlowPressure_Bond = PeerFlowPressure_Bond;
        ifs_data_t.PeerFlowPressure_Sales_Bond = PeerFlowPressure_Sales_Bond;
        ifs_data_t.PeerFlowPressure_Buys_Bond = PeerFlowPressure_Buys_Bond;

        % Compute equity pressure
        FlowInducedSales_Equity = sum(max(0, - (Network_Equity_2(k_outflows,:) - Network_Equity_1(k_outflows,:))));
        FlowInducedBuys_Equity  = sum(max(0,   (Network_Equity_2(k_inflows,:) - Network_Equity_1(k_inflows,:))));
        OfferingValue_Equity    = sum(Network_Equity_1);
        OfferingValue_Equity(OfferingValue_Equity == 0 ) = NaN;
        FlowPressure_Equity     = (FlowInducedBuys_Equity - FlowInducedSales_Equity) ./ OfferingValue_Equity;
        FlowPressure_Equity(isnan(FlowPressure_Equity)) = 0;
        FlowPressure_Buys_Equity     = (FlowInducedBuys_Equity) ./ OfferingValue_Equity;
        FlowPressure_Buys_Equity(isnan(FlowPressure_Buys_Equity)) = 0;
        FlowPressure_Sales_Equity     = (FlowInducedSales_Equity) ./ OfferingValue_Equity;
        FlowPressure_Sales_Equity(isnan(FlowPressure_Sales_Equity)) = 0;
        PeerFlowPressure_Equity  = WNetwork_Equity * FlowPressure_Equity';
        PeerFlowPressure_Buys_Equity   = WNetwork_Equity * FlowPressure_Buys_Equity';
        PeerFlowPressure_Sales_Equity  = WNetwork_Equity * FlowPressure_Sales_Equity';

        for k = 1:length(k_outflows)
            OwnPressure =  max(0, - (Network_Equity_2(k_outflows(k),:) - Network_Equity_1(k_outflows(k),:))) ./ OfferingValue_Equity;
            OwnPressure(isnan(OwnPressure)) = 0;
            PeerFlowPressure_Equity(k_outflows(k))       = PeerFlowPressure_Equity(k_outflows(k)) + (WNetwork_Equity(k_outflows(k),:) *OwnPressure');
            PeerFlowPressure_Sales_Equity(k_outflows(k)) = PeerFlowPressure_Sales_Equity(k_outflows(k)) - (WNetwork_Equity(k_outflows(k),:) *OwnPressure');
        end

        for k = 1:length(k_inflows)
            OwnPressure =  max(0, (Network_Equity_2(k_inflows(k),:) - Network_Equity_1(k_inflows(k),:))) ./ OfferingValue_Equity;
            OwnPressure(isnan(OwnPressure)) = 0;
            PeerFlowPressure_Equity(k_inflows(k)) = PeerFlowPressure_Equity(k_inflows(k)) - (WNetwork_Equity(k_inflows(k),:) *OwnPressure');
            PeerFlowPressure_Buys_Equity(k_inflows(k)) = PeerFlowPressure_Buys_Equity(k_inflows(k)) - (WNetwork_Equity(k_inflows(k),:) *OwnPressure');
        end

        ifs_data_t.PeerFlowPressure_Equity = PeerFlowPressure_Equity;
        ifs_data_t.PeerFlowPressure_Sales_Equity = PeerFlowPressure_Sales_Equity;
        ifs_data_t.PeerFlowPressure_Buys_Equity = PeerFlowPressure_Buys_Equity;

        % Compute fund share pressure
        FlowInducedSales_Fund = sum(max(0, - (Network_Fund_2(k_outflows,:) - Network_Fund_1(k_outflows,:))));
        FlowInducedBuys_Fund  = sum(max(0,   (Network_Fund_2(k_inflows,:) - Network_Fund_1(k_inflows,:))));
        OfferingValue_Fund    = sum(Network_Fund_1);
        OfferingValue_Fund(OfferingValue_Fund == 0 ) = NaN;
        FlowPressure_Fund     = (FlowInducedBuys_Fund - FlowInducedSales_Fund) ./ OfferingValue_Fund;
        FlowPressure_Fund(isnan(FlowPressure_Fund)) = 0;
        FlowPressure_Buys_Fund     = (FlowInducedBuys_Fund) ./ OfferingValue_Fund;
        FlowPressure_Buys_Fund(isnan(FlowPressure_Buys_Fund)) = 0;
        FlowPressure_Sales_Fund     = (FlowInducedSales_Fund) ./ OfferingValue_Fund;
        FlowPressure_Sales_Fund(isnan(FlowPressure_Sales_Fund)) = 0;
        PeerFlowPressure_Fund  = WNetwork_Fund * FlowPressure_Fund';
        PeerFlowPressure_Buys_Fund   = WNetwork_Fund * FlowPressure_Buys_Fund';
        PeerFlowPressure_Sales_Fund  = WNetwork_Fund * FlowPressure_Sales_Fund';

        for k = 1:length(k_outflows)
            OwnPressure =  max(0, - (Network_Fund_2(k_outflows(k),:) - Network_Fund_1(k_outflows(k),:))) ./ OfferingValue_Fund;
            OwnPressure(isnan(OwnPressure)) = 0;
            PeerFlowPressure_Fund(k_outflows(k))       = PeerFlowPressure_Fund(k_outflows(k)) + (WNetwork_Fund(k_outflows(k),:) *OwnPressure');
            PeerFlowPressure_Sales_Fund(k_outflows(k)) = PeerFlowPressure_Sales_Fund(k_outflows(k)) - (WNetwork_Fund(k_outflows(k),:) *OwnPressure');
        end

        for k = 1:length(k_inflows)
            OwnPressure =  max(0, (Network_Fund_2(k_inflows(k),:) - Network_Fund_1(k_inflows(k),:))) ./ OfferingValue_Fund;
            OwnPressure(isnan(OwnPressure)) = 0;
            PeerFlowPressure_Fund(k_inflows(k)) = PeerFlowPressure_Fund(k_inflows(k)) - (WNetwork_Fund(k_inflows(k),:) *OwnPressure');
            PeerFlowPressure_Buys_Fund(k_inflows(k)) = PeerFlowPressure_Buys_Fund(k_inflows(k)) - (WNetwork_Fund(k_inflows(k),:) *OwnPressure');
        end

        ifs_data_t.PeerFlowPressure_Fund       = PeerFlowPressure_Fund;
        ifs_data_t.PeerFlowPressure_Sales_Fund = PeerFlowPressure_Sales_Fund;
        ifs_data_t.PeerFlowPressure_Buys_Fund  = PeerFlowPressure_Buys_Fund;

        % Compute Peerflow_pressure from fund investments (extension of falato et al.)
        ifs_data_t.PeerFlowPressure_Fund1       = WNetwork_Fund * (PeerFlowPressure_Equity + PeerFlowPressure_Bond + PeerFlowPressure_Fund);
        ifs_data_t.PeerFlowPressure_Sales_Fund1 = WNetwork_Fund * (PeerFlowPressure_Sales_Equity + PeerFlowPressure_Sales_Bond + PeerFlowPressure_Sales_Fund);
        ifs_data_t.PeerFlowPressure_Buys_Fund1  = WNetwork_Fund * (PeerFlowPressure_Buys_Equity + PeerFlowPressure_Buys_Bond + PeerFlowPressure_Buys_Fund);

        ifs_data_t.PeerFlowPressure_Fund2       = WNetwork_Fund * (PeerFlowPressure_Equity );
        ifs_data_t.PeerFlowPressure_Sales_Fund2 = WNetwork_Fund * (PeerFlowPressure_Sales_Equity );
        ifs_data_t.PeerFlowPressure_Buys_Fund2  = WNetwork_Fund * (PeerFlowPressure_Buys_Equity );

        ifs_data_t.PeerFlowPressure_Fund3       = WNetwork_Fund * (PeerFlowPressure_Bond );
        ifs_data_t.PeerFlowPressure_Sales_Fund3 = WNetwork_Fund * (PeerFlowPressure_Sales_Bond );
        ifs_data_t.PeerFlowPressure_Buys_Fund3  = WNetwork_Fund * (PeerFlowPressure_Buys_Bond );

        ifs_data_t.PeerFlowPressure_Fund4       = WNetwork_Fund * (PeerFlowPressure_Fund );
        ifs_data_t.PeerFlowPressure_Sales_Fund4 = WNetwork_Fund * (PeerFlowPressure_Sales_Fund );
        ifs_data_t.PeerFlowPressure_Buys_Fund4  = WNetwork_Fund * (PeerFlowPressure_Buys_Fund );

        if isempty(Results_IFS_All) == 1
            Results_IFS_All  = ifs_data_t;
        else
            Results_IFS_All = [Results_IFS_All; ifs_data_t];
        end

        % Save asset-level results
        date = umonth(t);

        Results_AssetLevel = table(date*ones(length(uasset_Bond)+length(uasset_Equity),1),[uasset_Bond; uasset_Equity],...
            [FlowPressure_Bond'; FlowPressure_Equity'], ...
            [FlowPressure_Buys_Bond'; FlowPressure_Buys_Equity'], ...
            [FlowPressure_Sales_Bond'; FlowPressure_Sales_Equity']);
        Results_AssetLevel.Bond = zeros(height(Results_AssetLevel),1); Results_AssetLevel.Bond(1:length(uasset_Bond)) = 1;
        Results_AssetLevel.Properties.VariableNames{1} = 'DATUM';
        Results_AssetLevel.Properties.VariableNames{2} = 'ISIN';
        Results_AssetLevel.Properties.VariableNames{3} = 'FlowPressure';
        Results_AssetLevel.Properties.VariableNames{4} = 'FlowPressure_Buys';
        Results_AssetLevel.Properties.VariableNames{5} = 'FlowPressure_Sales';

        save(strcat(projectPath, 'Data\FalatoEtAl\tmp_SecLevel_',num2str(date),'.mat'),'Results_AssetLevel');
        
        % Do some housekeeping
        clear Results_AssetLevel;
        clear OwnPressure Peer* Flow* Offering* ifs_data_t IFS_All_t Network_1 Network_2 Network_Number*;

    end
end

% Again, do some housekeeping
keep Results_IFS_All Results_IFS_Matching_Outflows Results_IFS_Matching_Inflows;

% Save dataset
save(strcat(projectPath, 'Data\FalatoEtAl\FalatoEtAl'),'Results_IFS_All');
writetable(Results_IFS_All, strcat(projectPath, 'Data\FalatoEtAl\FalatoEtAl.txt'));


%%  =============================================== Build full asset-level dataset ==============================================================

keep projectPath

load(strcat(projectPath, 'Data\IFS\IFS_All_CleanData.mat'),'umonth');

umonth = umonth(umonth>200910 & umonth <= 202006);

for t = 1:length(umonth)

    load(strcat(projectPath, 'Data\FalatoEtAl\tmp_SecLevel_',num2str(umonth(t)),'.mat'),'Results_AssetLevel');

    if t == 1
        All_Results_AssetLevel = Results_AssetLevel;
    else
        All_Results_AssetLevel = [All_Results_AssetLevel; Results_AssetLevel];
    end

    clear Results_AssetLevel;

end

writetable(All_Results_AssetLevel,strcat(projectPath, 'Data\FalatoEtAl\FalatoEtAl_SecLevel.txt'));